rm(list=ls())

# Load Packages
library(haven)
library(tidyverse)
library(data.table)
library(binsreg)
library(lfe)
library(ivreg)

source("code/config.R")
source(paste0(CODE,"parameters.R"))

# Read in data
df<-read_csv(paste(DATA, "clean_data2.csv", sep=""))
df_post<-read_csv(paste(DATA, "clean_data_post2.csv", sep=""))

# Merge in rent data
rents<-read_csv(paste(DATA, "REALIS_retail_rent2015.csv", sep=""))
rents_post<-read_csv(paste(DATA, "REALIS_retail_rent2018.csv", sep=""))
rents <- rents %>% dplyr::select(sub_area_o=X1,residential_rent=Median)
rents$sub_area_o<- toupper(rents$sub_area_o)
rents_post <- rents_post %>% dplyr::select(sub_area_o=X1,residential_rent=Median)
rents_post$sub_area_o<- toupper(rents_post$sub_area_o)
df_o<-left_join(df_o,rents)

# Compute variables
df_o<- df %>% filter(!is.na(res_fe_S) & !is.na(res_fe_L) & !is.na(res_fe_S_w) & !is.na(res_fe_L_w)) %>% select(aHome,sub_area_o,residents,residents_w,res_fe_L_w,res_fe_L,res_fe_S,res_fe_S_w) %>% distinct()
df_o<-df_o %>% mutate(share_resident= residents/sum(residents) , share_resident_w= residents_w/sum(residents_w) )
df_o$BIGW<-gamma((epilson_l-1)/epilson_l)*exp(-df_o$res_fe_L)^(1/epilson_l)
df_o$BIGS<-gamma((epilson_s-1)/epilson_s)*exp(-df_o$res_fe_S)^(alpha_N_p/epilson_s)
df_o$BIGW_w<-gamma((epilson_l_w-1)/epilson_l_w)*exp(-df_o$res_fe_L_w)^(1/epilson_l_w)
df_o$BIGS_w<-gamma((epilson_s_w-1)/epilson_s_w)*exp(-df_o$res_fe_S_w)^(alpha_N_m/epilson_s_w)
df_o$share_p<-df_o$share_resident*total_pop/df_o$share_resident_w*total_pop_w

# Compute variables in post period dataset
df_o_post<- df_post %>% filter(!is.na(res_fe_S) & !is.na(res_fe_L) & !is.na(res_fe_S_w) & !is.na(res_fe_L_w)) %>% select(aHome,sub_area_o,residents,residents_w,res_fe_L_w,res_fe_L,res_fe_S,res_fe_S_w) %>% distinct()
df_o_post<-df_o_post %>% mutate(share_resident= residents/sum(residents) , share_resident_w= residents_w/sum(residents_w) )
df_o_post$BIGW<-gamma((epilson_l-1)/epilson_l)*exp(-df_o_post$res_fe_L)^(1/epilson_l)
df_o_post$BIGS<-gamma((epilson_s-1)/epilson_s)*exp(-df_o_post$res_fe_S)^(alpha_N_p/epilson_s)
df_o_post$BIGW_w<-gamma((epilson_l_w-1)/epilson_l_w)*exp(-df_o_post$res_fe_L_w)^(1/epilson_l_w)
df_o_post$BIGS_w<-gamma((epilson_s_w-1)/epilson_s_w)*exp(-df_o_post$res_fe_S_w)^(alpha_N_m/epilson_s_w)
df_o_post<-left_join(df_o_post,rents_post)
df_o_post$share_p<-df_o_post$share_resident*total_pop/df_o_post$share_resident_w*total_pop_w

# Calculate deltas
df_o$change_share_resident<- log(df_o_post$share_resident)-log(df_o$share_resident)
df_o$change_QWS<- log( (df_o_post$residential_rent^-alpha_H_p)*df_o_post$BIGW*df_o_post$BIGS)-log( (df_o$residential_rent^-alpha_H_p)*df_o$BIGW*df_o$BIGS)
df_o$change_W<- log( df_o_post$BIGW)-log(df_o$BIGW)
df_o$change_S<- log( df_o_post$BIGS)-log(df_o$BIGS)
df_o$change_Q<- log( df_o_post$residential_rent)-log(df_o$residential_rent)
df_o$change_share_plus<-log(df_o_post$share_p)-log(df_o$share_p)
df_o$change_share_minus<-log(1/df_o_post$share_p)-log(1/df_o$share_p)
df_o$change_share_resident_w<- log(df_o_post$share_resident_w)-log(df_o$share_resident_w)
df_o$change_QWS_w<- log( (df_o_post$residential_rent^-alpha_H_m)*df_o_post$BIGW_w*df_o_post$BIGS_w)-log( (df_o$residential_rent^-alpha_H_m)*df_o$BIGW_w*df_o$BIGS_w)

# Estimation: Table A10
out1<-(ivreg(change_share_resident ~ change_QWS+change_share_plus, data=df_o))
out2<-(ivreg(change_share_resident_w ~ change_QWS_w+change_share_minus, data=df_o))
stargazer(out1,out2)

# Extract estimated parameters
df$epilson_r<-as.numeric(out1$coefficients[2])
df$mu_r<-as.numeric(out1$coefficients[3])/as.numeric(out1$coefficients[2])
df$epilson_r_w<-as.numeric(out2$coefficients[2])
df$mu_r_w<-as.numeric(out2$coefficients[3])/as.numeric(out2$coefficients[2])

# Save
write_csv(df,paste(DATA, "clean_data4.csv", sep=""))

# Merge in residential amenities data
CCs <- read_csv("data/ccBySubzone.csv")
SSO <- read_csv("data/ssoBySubzone.csv")
schools <- read_csv("data/schoolsBySubzone.csv")
parks <- read_csv("data/parksBySubzone.csv")
df_o<-left_join(df_o,CCs)
df_o<-left_join(df_o,parks)
df_o<-left_join(df_o,schools)
df_o<-left_join(df_o,SSO)

# Compute variables
df_o$BIGB<-df_o$residential_rent^alpha_H_p*df_o$share_resident^(1/epilson_R_p) / (df_o$BIGS*df_o$BIGW)
df_o$BIGB_w<-df_o$residential_rent^alpha_H_m*df_o$share_resident_w^(1/epilson_R_m) / (df_o$BIGS_w*df_o$BIGW_w)
df_o$schools<-df_o$nbd+df_o$SAP+df_o$GEPSAP+df_o$GEP
df_o$elite<-df_o$SAP+df_o$GEPSAP+df_o$GEP
df_o$cc[is.na(df_o$cc)]<-0
df_o$parks[is.na(df_o$parks)]<-0
df_o$schools[is.na(df_o$schools)]<-0
df_o$sso[is.na(df_o$sso)]<-0
df_o$elite[is.na(df_o$elite)]<-0
df_o$GEP[is.na(df_o$GEP)]<-0
df_o$GEPSAP[is.na(df_o$GEPSAP)]<-0
df_o$SAP[is.na(df_o$SAP)]<-0

# Regressions: Table A11
out1<-(lm((log(BIGB))~scale(parks)+share_p, df_o ))
out2<-(lm((log(BIGB))~scale(cc)+share_p, df_o ))
out3<-(lm((log(BIGB))~scale(schools)+share_p, df_o ))
out5<-(lm((log(BIGB))~scale(nbd)+share_p, df_o ))
out1a<-(lm((log(BIGB_w))~scale(parks)+share_m, df_o ))
out2a<-(lm((log(BIGB_w))~scale(cc)+share_m, df_o ))
out3a<-(lm((log(BIGB_w))~scale(schools)+share_m, df_o ))
out5a<-(lm((log(BIGB_w))~scale(nbd)+share_m, df_o ))
stargazer(out1,out1a,out2,out2a,out3,out3a,out5,out5a)

